}
/* calls in xen/common code that are unused on ia64 */
-void synchronise_pagetables(unsigned long cpu_mask) { return; }
+void synchronise_execution_state(unsigned long cpu_mask) { }
int grant_table_create(struct domain *d) { return 0; }
void grant_table_destroy(struct domain *d)
static int opt_noreboot = 0;
boolean_param("noreboot", opt_noreboot);
+struct percpu_ctxt {
+ struct exec_domain *curr_ed;
+} __cacheline_aligned;
+static struct percpu_ctxt percpu_ctxt[NR_CPUS];
+
static void default_idle(void)
{
- __cli();
+ local_irq_disable();
if ( !softirq_pending(smp_processor_id()) )
safe_halt();
else
- __sti();
+ local_irq_enable();
}
static __attribute_used__ void idle_loop(void)
{
/* Just some sanity to ensure that the scheduler is set up okay. */
ASSERT(current->domain->id == IDLE_DOMAIN_ID);
+ percpu_ctxt[smp_processor_id()].curr_ed = current;
+ set_bit(smp_processor_id(), ¤t->domain->cpuset);
domain_unpause_by_systemcontroller(current->domain);
raise_softirq(SCHEDULE_SOFTIRQ);
do_softirq();
safe_halt();
}
- __sti();
+ local_irq_enable();
/* Ensure we are the boot CPU. */
if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid )
struct pfn_info *mmfn_info;
struct domain *d = ed->domain;
- ASSERT(!pagetable_val(ed->arch.monitor_table)); /* we should only get called once */
+ ASSERT(pagetable_val(ed->arch.monitor_table) == 0);
mmfn_info = alloc_domheap_page(NULL);
- ASSERT( mmfn_info );
+ ASSERT(mmfn_info != NULL);
mmfn = (unsigned long) (mmfn_info - frame_table);
mpl2e = (l2_pgentry_t *) map_domain_mem(mmfn << PAGE_SHIFT);
ed->arch.monitor_vtable = mpl2e;
- // map the phys_to_machine map into the Read-Only MPT space for this domain
+ /* Map the p2m map into the Read-Only MPT space for this domain. */
mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
mk_l2_pgentry(pagetable_val(ed->arch.phys_table) | __PAGE_HYPERVISOR);
: "=r" (__r) : "r" (value), "0" (__r) );\
__r; })
-static void switch_segments(
- struct xen_regs *regs, struct exec_domain *p, struct exec_domain *n)
+static void load_segments(struct exec_domain *p, struct exec_domain *n)
{
int all_segs_okay = 1;
- if ( !is_idle_task(p->domain) )
- {
- __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_ctxt.ds) );
- __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_ctxt.es) );
- __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_ctxt.fs) );
- __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_ctxt.gs) );
- }
-
/* Either selector != 0 ==> reload. */
if ( unlikely(p->arch.user_ctxt.ds |
n->arch.user_ctxt.ds) )
if ( unlikely(!all_segs_okay) )
{
- unsigned long *rsp =
+ struct xen_regs *regs = get_execution_context();
+ unsigned long *rsp =
(n->arch.flags & TF_kernel_mode) ?
(unsigned long *)regs->rsp :
(unsigned long *)n->arch.kernel_sp;
}
}
+static void save_segments(struct exec_domain *p)
+{
+ __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_ctxt.ds) );
+ __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_ctxt.es) );
+ __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_ctxt.fs) );
+ __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_ctxt.gs) );
+}
+
+static void clear_segments(void)
+{
+ __asm__ __volatile__ (
+ "movl %0,%%ds; "
+ "movl %0,%%es; "
+ "movl %0,%%fs; "
+ "movl %0,%%gs; swapgs; movl %0,%%gs"
+ : : "r" (0) );
+}
+
long do_switch_to_user(void)
{
struct xen_regs *regs = get_execution_context();
#elif defined(__i386__)
-#define switch_segments(_r, _p, _n) ((void)0)
+#define load_segments(_p, _n) ((void)0)
+#define save_segments(_p) ((void)0)
+#define clear_segments() ((void)0)
#endif
-/*
- * This special macro can be used to load a debugging register
- */
#define loaddebug(_ed,_reg) \
- __asm__("mov %0,%%db" #_reg \
- : /* no output */ \
- :"r" ((_ed)->debugreg[_reg]))
+ __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_ed)->debugreg[_reg]))
-void context_switch(struct exec_domain *prev_p, struct exec_domain *next_p)
+static void __context_switch(void)
{
-#ifdef __i386__
- struct tss_struct *tss = init_tss + smp_processor_id();
-#endif
execution_context_t *stack_ec = get_execution_context();
+ unsigned int cpu = smp_processor_id();
+ struct exec_domain *p = percpu_ctxt[cpu].curr_ed;
+ struct exec_domain *n = current;
- __cli();
-
- /* Switch guest general-register state. */
- if ( !is_idle_task(prev_p->domain) )
+ if ( !is_idle_task(p->domain) )
{
- memcpy(&prev_p->arch.user_ctxt,
+ memcpy(&p->arch.user_ctxt,
stack_ec,
sizeof(*stack_ec));
- unlazy_fpu(prev_p);
- CLEAR_FAST_TRAP(&prev_p->arch);
+ unlazy_fpu(p);
+ CLEAR_FAST_TRAP(&p->arch);
+ save_segments(p);
}
- if ( !is_idle_task(next_p->domain) )
- {
- memcpy(stack_ec,
- &next_p->arch.user_ctxt,
- sizeof(*stack_ec));
+ memcpy(stack_ec,
+ &n->arch.user_ctxt,
+ sizeof(*stack_ec));
- /* Maybe switch the debug registers. */
- if ( unlikely(next_p->arch.debugreg[7]) )
- {
- loaddebug(&next_p->arch, 0);
- loaddebug(&next_p->arch, 1);
- loaddebug(&next_p->arch, 2);
- loaddebug(&next_p->arch, 3);
- /* no 4 and 5 */
- loaddebug(&next_p->arch, 6);
- loaddebug(&next_p->arch, 7);
- }
+ /* Maybe switch the debug registers. */
+ if ( unlikely(n->arch.debugreg[7]) )
+ {
+ loaddebug(&n->arch, 0);
+ loaddebug(&n->arch, 1);
+ loaddebug(&n->arch, 2);
+ loaddebug(&n->arch, 3);
+ /* no 4 and 5 */
+ loaddebug(&n->arch, 6);
+ loaddebug(&n->arch, 7);
+ }
- if ( !VMX_DOMAIN(next_p) )
- {
- SET_FAST_TRAP(&next_p->arch);
+ if ( !VMX_DOMAIN(n) )
+ {
+ SET_FAST_TRAP(&n->arch);
#ifdef __i386__
+ {
/* Switch the kernel ring-1 stack. */
- tss->esp1 = next_p->arch.kernel_sp;
- tss->ss1 = next_p->arch.kernel_ss;
-#endif
+ struct tss_struct *tss = &init_tss[cpu];
+ tss->esp1 = n->arch.kernel_sp;
+ tss->ss1 = n->arch.kernel_ss;
}
-
- /* Switch page tables. */
- write_ptbase(next_p);
+#endif
}
- set_current(next_p);
+ set_bit(cpu, &n->domain->cpuset);
+ write_ptbase(n);
+ clear_bit(cpu, &p->domain->cpuset);
- __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
+ __asm__ __volatile__ ( "lgdt %0" : "=m" (*n->arch.gdt) );
+
+ percpu_ctxt[cpu].curr_ed = n;
+}
- __sti();
- if ( !VMX_DOMAIN(next_p) )
+void context_switch(struct exec_domain *prev, struct exec_domain *next)
+{
+ struct exec_domain *realprev;
+
+ local_irq_disable();
+
+ set_current(next);
+
+ if ( ((realprev = percpu_ctxt[smp_processor_id()]. curr_ed) == next) ||
+ is_idle_task(next->domain) )
{
- load_LDT(next_p);
- switch_segments(stack_ec, prev_p, next_p);
+ local_irq_enable();
+ }
+ else
+ {
+ __context_switch();
+
+ local_irq_enable();
+
+ if ( !VMX_DOMAIN(next) )
+ {
+ load_LDT(next);
+ load_segments(realprev, next);
+ }
}
/*
* 'prev' (after this point, a dying domain's info structure may be freed
* without warning).
*/
- clear_bit(EDF_RUNNING, &prev_p->ed_flags);
+ clear_bit(EDF_RUNNING, &prev->ed_flags);
- schedule_tail(next_p);
+ schedule_tail(next);
BUG();
}
+static void __synchronise_lazy_execstate(void *unused)
+{
+ if ( percpu_ctxt[smp_processor_id()].curr_ed != current )
+ {
+ __context_switch();
+ load_LDT(current);
+ clear_segments();
+ }
+}
+void synchronise_lazy_execstate(unsigned long cpuset)
+{
+ smp_subset_call_function(__synchronise_lazy_execstate, NULL, 1, cpuset);
+}
+
unsigned long __hypercall_create_continuation(
unsigned int op, unsigned int nr_args, ...)
{
{
struct exec_domain *ed;
- /* Ensure that noone is running over the dead domain's page tables. */
- synchronise_pagetables(~0UL);
+ BUG_ON(d->cpuset != 0);
/* Release device mappings of other domains */
gnttab_release_dev_mappings( d->grant_table );
-
/* Exit shadow mode before deconstructing final guest page table. */
shadow_mode_disable(d);
update_pagetables(ed);
/* Install the new page tables. */
- __cli();
+ local_irq_disable();
write_ptbase(ed);
/* Copy the OS image and free temporary buffer. */
/* Reinstate the caller's page tables. */
write_ptbase(current);
- __sti();
+ local_irq_enable();
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
* may be unnecessary (e.g., page was GDT/LDT) but those
* circumstances should be very rare.
*/
- struct exec_domain *ed;
- unsigned long mask = 0;
- for_each_exec_domain ( page_get_owner(page), ed )
- mask |= 1 << ed->processor;
- mask = tlbflush_filter_cpuset(mask, page->tlbflush_timestamp);
+ unsigned long cpuset = tlbflush_filter_cpuset(
+ page_get_owner(page)->cpuset, page->tlbflush_timestamp);
- if ( unlikely(mask != 0) )
+ if ( unlikely(cpuset != 0) )
{
perfc_incrc(need_flush_tlb_flush);
- flush_tlb_mask(mask);
+ flush_tlb_mask(cpuset);
}
/* We lose existing type, back pointer, and validity. */
if ( d != current->domain )
domain_pause(d);
- synchronise_pagetables(~0UL);
+ synchronise_lazy_execstate(~0UL);
printk("pt base=%lx sh_info=%x\n",
pagetable_val(d->exec_domain[0]->arch.guest_table)>>PAGE_SHIFT,
}
domain_pause(d);
- synchronise_pagetables(~0UL);
shadow_lock(d);
*/
/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ * The following functions deal with sending IPIs between CPUs.
*/
static inline int __prepare_ICR (unsigned int shortcut, int vector)
* of the value read we use an atomic rmw access to avoid costly
* cli/sti. Otherwise we use an even cheaper single atomic write
* to the APIC.
- */
+ */
unsigned int cfg;
/*
- * Wait for idle.
- */
+ * Wait for idle.
+ */
apic_wait_icr_idle();
/*
- * No need to touch the target chip field
- */
+ * No need to touch the target chip field
+ */
cfg = __prepare_ICR(shortcut, vector);
/*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
apic_write_around(APIC_ICR, cfg);
}
unsigned long cfg;
unsigned long flags;
- __save_flags(flags);
- __cli();
+ local_irq_save(flags);
-
/*
* Wait for idle.
*/
apic_wait_icr_idle();
-
+
/*
* prepare target chip field
*/
cfg = __prepare_ICR2(mask);
apic_write_around(APIC_ICR2, cfg);
-
+
/*
* program the ICR
*/
cfg = __prepare_ICR(0, vector);
-
+
/*
* Send the IPI. The write to APIC_ICR fires this off.
*/
apic_write_around(APIC_ICR, cfg);
- __restore_flags(flags);
+ local_irq_restore(flags);
}
static inline void send_IPI_allbutself(int vector)
{
/*
- * if there are no other CPUs in the system then
- * we get an APIC send error if we try to broadcast.
- * thus we have to avoid sending IPIs in this case.
+ * If there are no other CPUs in the system then we get an APIC send error
+ * if we try to broadcast. thus we have to avoid sending IPIs in this case.
*/
- if (!(smp_num_cpus > 1))
+ if ( smp_num_cpus <= 1 )
return;
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
-/*
- * ********* XEN NOTICE **********
- * I've left the following comments lying around as they look liek they might
- * be useful to get multiprocessor guest OSes going. However, I suspect the
- * issues we face will be quite different so I've ripped out all the
- * TLBSTATE logic (I didn't understand it anyway :-). These comments do
- * not apply to Xen, therefore! -- Keir (8th Oct 2003).
- */
-/*
- * Smarter SMP flushing macros.
- * c/o Linus Torvalds.
- *
- * These mean you can really definitely utterly forget about
- * writing to user space from interrupts. (Its not allowed anyway).
- *
- * Optimizations Manfred Spraul <manfred@colorfullife.com>
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask);
- * Stop ipi delivery for the old mm. This is not synchronized with
- * the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * for the wrong mm, and in the worst case we perform a superflous
- * tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- * was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask);
- * Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- * cpu_tlbstate[].active_mm is correct, cpu0 already handles
- * flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * Atomically set the bit [other cpus will start sending flush ipis],
- * and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- * runs in kernel space, the cpu could load tlb entries for user space
- * pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- *
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
static unsigned long flush_cpumask;
{
ASSERT(local_irq_is_enabled());
- if ( mask & (1 << smp_processor_id()) )
+ if ( mask & (1UL << smp_processor_id()) )
{
local_flush_tlb();
- mask &= ~(1 << smp_processor_id());
+ mask &= ~(1UL << smp_processor_id());
}
if ( mask != 0 )
{
spin_lock(&flush_lock);
-
flush_cpumask = mask;
send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
while ( flush_cpumask != 0 )
cpu_relax();
-
spin_unlock(&flush_lock);
}
}
if ( smp_num_cpus > 1 )
{
spin_lock(&flush_lock);
- flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id());
+ flush_cpumask = (1UL << smp_num_cpus) - 1;
+ flush_cpumask &= ~(1UL << smp_processor_id());
send_IPI_allbutself(INVALIDATE_TLB_VECTOR);
while ( flush_cpumask != 0 )
cpu_relax();
tlbflush_clock++;
}
-static void flush_tlb_all_pge_ipi(void* info)
+static void flush_tlb_all_pge_ipi(void *info)
{
__flush_tlb_pge();
}
void flush_tlb_all_pge(void)
{
- smp_call_function (flush_tlb_all_pge_ipi,0,1,1);
+ smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1);
__flush_tlb_pge();
}
void smp_send_event_check_mask(unsigned long cpu_mask)
{
- cpu_mask &= ~(1<<smp_processor_id());
+ cpu_mask &= ~(1UL << smp_processor_id());
if ( cpu_mask != 0 )
send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR);
}
/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
+ * Structure and data for smp_call_function().
*/
-static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
struct call_data_struct {
void (*func) (void *info);
void *info;
- atomic_t started;
- atomic_t finished;
+ unsigned long started;
+ unsigned long finished;
int wait;
};
-static struct call_data_struct * call_data;
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+static struct call_data_struct *call_data;
/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler, or bottom halfs.
+ * Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, spin until function has completed on other CPUs.
+ * Returns: 0 on success, else a negative status code.
*/
+int smp_call_function(
+ void (*func) (void *info), void *info, int unused, int wait)
{
struct call_data_struct data;
- int cpus = smp_num_cpus-1;
+ unsigned long cpuset;
+
+ ASSERT(local_irq_is_enabled());
- if (!cpus)
+ cpuset = ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id());
+ if ( cpuset == 0 )
return 0;
data.func = func;
data.info = info;
- atomic_set(&data.started, 0);
+ data.started = data.finished = 0;
data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
+
+ spin_lock(&call_lock);
+
+ call_data = &data;
+ wmb();
+
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+ while ( (wait ? data.finished : data.started) != cpuset )
+ cpu_relax();
+
+ spin_unlock(&call_lock);
+
+ return 0;
+}
+
+/* Run a function on a subset of CPUs (may include local CPU). */
+int smp_subset_call_function(
+ void (*func) (void *info), void *info, int wait, unsigned long cpuset)
+{
+ struct call_data_struct data;
ASSERT(local_irq_is_enabled());
+ if ( cpuset & (1UL << smp_processor_id()) )
+ {
+ local_irq_disable();
+ (*func)(info);
+ local_irq_enable();
+ }
+
+ cpuset &= ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id());
+ if ( cpuset == 0 )
+ return 0;
+
+ data.func = func;
+ data.info = info;
+ data.started = data.finished = 0;
+ data.wait = wait;
+
spin_lock(&call_lock);
call_data = &data;
wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- barrier();
+ send_IPI_mask(cpuset, CALL_FUNCTION_VECTOR);
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- barrier();
+ while ( (wait ? data.finished : data.started) != cpuset )
+ cpu_relax();
spin_unlock(&call_lock);
return 0;
}
-static void stop_this_cpu (void * dummy)
+static void stop_this_cpu (void *dummy)
{
- /*
- * Remove this CPU:
- */
clear_bit(smp_processor_id(), &cpu_online_map);
- __cli();
+
disable_local_APIC();
- for(;;) __asm__("hlt");
-}
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+}
void smp_send_stop(void)
{
+ /* Stop all other CPUs in the system. */
smp_call_function(stop_this_cpu, NULL, 1, 0);
smp_num_cpus = 1;
- __cli();
+ local_irq_disable();
disable_local_APIC();
- __sti();
+ local_irq_enable();
}
-/*
- * Nothing to do, as all the work is done automatically when
- * we return from the interrupt.
- */
asmlinkage void smp_event_check_interrupt(void)
{
ack_APIC_irq();
{
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
- int wait = call_data->wait;
ack_APIC_irq();
perfc_incrc(ipis);
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(info);
- if (wait) {
+ if ( call_data->wait )
+ {
+ (*func)(info);
+ mb();
+ set_bit(smp_processor_id(), &call_data->finished);
+ }
+ else
+ {
mb();
- atomic_inc(&call_data->finished);
+ set_bit(smp_processor_id(), &call_data->started);
+ (*func)(info);
}
}
}
}
-/*
- * Allows shooting down of borrowed page-table use on specific CPUs.
- * Specifically, we borrow page tables when running the idle domain.
- */
-static void __synchronise_pagetables(void *mask)
-{
- struct exec_domain *ed = current;
- if ( ((unsigned long)mask & (1 << ed->processor)) &&
- is_idle_task(ed->domain) )
- write_ptbase(ed);
-}
-void synchronise_pagetables(unsigned long cpu_mask)
-{
- __synchronise_pagetables((void *)cpu_mask);
- smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
-}
long do_stack_switch(unsigned long ss, unsigned long esp)
{
}
}
-/*
- * Allows shooting down of borrowed page-table use on specific CPUs.
- * Specifically, we borrow page tables when running the idle domain.
- */
-static void __synchronise_pagetables(void *mask)
-{
- struct exec_domain *ed = current;
- if ( ((unsigned long)mask & (1 << ed->processor)) &&
- is_idle_task(ed->domain) )
- write_ptbase(ed);
-}
-void synchronise_pagetables(unsigned long cpu_mask)
-{
- __synchronise_pagetables((void *)cpu_mask);
- smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
-}
-
long do_stack_switch(unsigned long ss, unsigned long esp)
{
if ( (ss & 3) != 3 )
else
{
exec_domain_pause(ed);
- synchronise_pagetables(~0UL);
if ( ed->processor != (cpu % smp_num_cpus) )
set_bit(EDF_MIGRATED, &ed->ed_flags);
set_bit(EDF_CPUPINNED, &ed->ed_flags);
{
int i, drop_dom_ref;
struct domain *d = page_get_owner(pg);
- struct exec_domain *ed;
- int cpu_mask = 0;
ASSERT(!in_irq());
/* NB. May recursively lock from domain_relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
- for_each_exec_domain ( d, ed )
- cpu_mask |= 1 << ed->processor;
-
for ( i = 0; i < (1 << order); i++ )
{
ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpu_mask = cpu_mask;
+ pg[i].u.free.cpu_mask = d->cpuset;
list_del(&pg[i].list);
}
void sched_rem_domain(struct exec_domain *ed)
{
-
rem_ac_timer(&ed->timer);
SCHED_OP(rem_task, ed);
TRACE_3D(TRC_SCHED_DOM_REM, ed->domain->id, ed->eid, ed);
int check_descriptor(struct desc_struct *d);
-/*
- * Use currently-executing domain's pagetables on the specified CPUs.
- * i.e., stop borrowing someone else's tables if you are the idle domain.
- */
-void synchronise_pagetables(unsigned long cpu_mask);
-
/*
* The MPT (machine->physical mapping table) is an array of word-sized
* values, indexed on machine frame number. It is expected that guest OSes
* ptr[:2] -- Machine address of new page-table base to install in MMU
* when in user space.
*
- * val[7:0] == MMUEXT_TLB_FLUSH:
- * No additional arguments.
+ * val[7:0] == MMUEXT_TLB_FLUSH_LOCAL:
+ * No additional arguments. Flushes local TLB.
*
- * val[7:0] == MMUEXT_INVLPG:
- * ptr[:2] -- Linear address to be flushed from the TLB.
+ * val[7:0] == MMUEXT_INVLPG_LOCAL:
+ * ptr[:2] -- Linear address to be flushed from the local TLB.
*
* val[7:0] == MMUEXT_FLUSH_CACHE:
* No additional arguments. Writes back and flushes cache contents.
* val[7:0] == MMUEXT_REASSIGN_PAGE:
* ptr[:2] -- A machine address within the page to be reassigned to the FD.
* (NB. page must currently belong to the calling domain).
+ *
+ * val[7:0] == MMUEXT_TLB_FLUSH_MULTI:
+ * Flush TLBs of VCPUs specified in @mask.
+ *
+ * val[7:0] == MMUEXT_INVLPG_MULTI:
+ * ptr[:2] -- Linear address to be flushed from TLB of VCPUs in @mask.
*/
#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */
#define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */
#define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */
#define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */
#define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */
-#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */
-#define MMUEXT_INVLPG 7 /* ptr = VA to invalidate */
+#define MMUEXT_TLB_FLUSH_LOCAL 6 /* ptr = NULL */
+#define MMUEXT_INVLPG_LOCAL 7 /* ptr = VA to invalidate */
#define MMUEXT_FLUSH_CACHE 8
#define MMUEXT_SET_LDT 9 /* ptr = VA of table; val = # entries */
#define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */
#define MMUEXT_TRANSFER_PAGE 12 /* ptr = MA of frame; val[31:16] = dom */
#define MMUEXT_REASSIGN_PAGE 13
#define MMUEXT_NEW_USER_BASEPTR 14
+#define MMUEXT_TLB_FLUSH_MULTI 15 /* ptr = NULL; mask = VCPUs to flush */
+#define MMUEXT_INVLPG_MULTI 16 /* ptr = VA to inval.; mask = VCPUs */
#define MMUEXT_CMD_MASK 255
#define MMUEXT_CMD_SHIFT 8
#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */
#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */
+/* Backwards source compatibility. */
+#define MMUEXT_TLB_FLUSH MMUEXT_TLB_FLUSH_LOCAL
+#define MMUEXT_INVLPG MMUEXT_INVLPG_LOCAL
/*
* Commands to HYPERVISOR_sched_op().
*/
typedef struct
{
- memory_t ptr; /* Machine address of PTE. */
- memory_t val; /* New contents of PTE. */
+ memory_t ptr; /* Machine address of PTE. */
+ memory_t val; /* New contents of PTE. */
+ /*unsigned long mask;*/ /* VCPU mask (certain extended commands). */
} PACKED mmu_update_t;
/*
struct exec_domain *exec_domain[MAX_VIRT_CPUS];
+ /* Bitmask of CPUs on which this domain is running. */
+ unsigned long cpuset;
+
struct arch_domain arch;
};
void domain_wake(struct exec_domain *d);
void domain_sleep(struct exec_domain *d);
+/*
+ * Force loading of currently-executing domain state on the specified set
+ * of CPUs. This is used to counteract lazy state switching where required.
+ */
+void synchronise_lazy_execstate(unsigned long cpuset);
+
extern void context_switch(
struct exec_domain *prev,
struct exec_domain *next);
ASSERT(ed != current);
atomic_inc(&ed->pausecnt);
domain_sleep(ed);
+ synchronise_lazy_execstate(ed->domain->cpuset & (1UL << ed->processor));
}
static inline void domain_pause(struct domain *d)
{
struct exec_domain *ed;
- for_each_exec_domain(d, ed)
- exec_domain_pause(ed);
+ for_each_exec_domain( d, ed )
+ {
+ ASSERT(ed != current);
+ atomic_inc(&ed->pausecnt);
+ domain_sleep(ed);
+ }
+
+ synchronise_lazy_execstate(d->cpuset);
}
static inline void exec_domain_unpause(struct exec_domain *ed)
{
struct exec_domain *ed;
- for_each_exec_domain(d, ed)
+ for_each_exec_domain( d, ed )
exec_domain_unpause(ed);
}
domain_wake(ed);
}
-static inline void domain_unblock(struct domain *d)
-{
- struct exec_domain *ed;
-
- for_each_exec_domain(d, ed)
- exec_domain_unblock(ed);
-}
-
static inline void domain_pause_by_systemcontroller(struct domain *d)
{
struct exec_domain *ed;
- for_each_exec_domain(d, ed) {
+ for_each_exec_domain ( d, ed )
+ {
ASSERT(ed != current);
if ( !test_and_set_bit(EDF_CTRLPAUSE, &ed->ed_flags) )
domain_sleep(ed);
}
+
+ synchronise_lazy_execstate(d->cpuset);
}
static inline void domain_unpause_by_systemcontroller(struct domain *d)
{
struct exec_domain *ed;
- for_each_exec_domain(d, ed) {
+ for_each_exec_domain ( d, ed )
+ {
if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) )
domain_wake(ed);
}
/*
* Call a function on all other processors
*/
-extern int smp_call_function (void (*func) (void *info), void *info,
- int retry, int wait);
+extern int smp_call_function(
+ void (*func) (void *info), void *info, int retry, int wait);
+extern int smp_subset_call_function(
+ void (*func) (void *info), void *info, int wait, unsigned long cpuset);
/*
* True once the per process idle is forked
#define kernel_lock()
#define cpu_logical_map(cpu) 0
#define cpu_number_map(cpu) 0
-#define smp_call_function(func,info,retry,wait) ({ 0; })
+#define smp_call_function(func,info,retry,wait) 0
+#define smp_subset_call_function(f,i,w,c) ({ if ( (c&1) ) (*f)(i); 0; })
#define cpu_online_map 1
#endif